* Housekeeping =================================================================
clear all

* Globals ======================================================================
global rawdata 			"N:\M3642\Data\"
global additionaldata 	"L:\M3642\felles\IsaksenJohansenReplication\additional_data\"
global newdata 			"L:\M3642\felles\IsaksenJohansenReplication\datasets\"
global dofiles 			"L:\M3642\felles\IsaksenJohansenReplication\do_files\"
global figures 			"L:\M3642\felles\IsaksenJohansenReplication\figures\"
global tables 			"L:\M3642\felles\IsaksenJohansenReplication\tables\"
global ster 			"L:\M3642\felles\IsaksenJohansenReplication\ster_files\"
sysdir set PLUS "L:\M3642\felles\IsaksenJohansenReplication\ado_files\plus"

* If the following packages are not in your PLUS repository, uncomment the lines below
*ssc install estout
*ssc install reghdfe
*ssc install spmaps

* Run do-files =================================================================

* Step one: read in raw data and create general datasets 
* (households and car ownership in Norway)

* ==============================================================================

/* 01: Importing and appending annual data on car ownership from source files.
    Also fixes some inconsistencies over time (e.g. time-invariant variables
	that are changing in the raw data). 
	
	Input datasets: 
	${rawdata}mreg_g[year] and ${rawdata}mreg_org_g[year]
	 - raw data from the motor vehicle register
	
	Output dataset:
	${newdata}lnr_regnr_year
	 - one observation per car per year, including car attrs and owner id
*/
do "${dofiles}01_formatting_car_ownership.do" // Run time: 2.5 hours

/* 02: Creating timelines for each car to ensure that they are treated
	consistently in case we want to follow cars, rather than individuals, over time.
	First, a dataset on the car-event level is created. Then a dataset is created 
	where one observation is the time period between two events (see under "output
	datasets" for more information). This ensures that (1) we keep track of all 
	ownership changes for each car over time and (2) that we are able to fix 
	errors in the raw data (e.g. cars reported as still part of the car fleet 
	after their scrap date).
	
	Input datasets: 
	${newdata}lnr_regnr_year
	- created in do-file #1 
	
	${rawdata}kjl[year]
	- Raw data on odometer readings from EU controls
	
	Output datasets: 
	${newdata}timeline
	- one observation per car per event, incl. time stamp of the event. Events
	  are one of the following: "new purchase", "import", "de-registration",
	  "re-registration", "EU control", "export" or "scrapping". The dataset 
	  includes info on the owner id, which should only change during a 
	  re-registration event.
	  
	${newdata}car_owner_event
	- one observation denotes the time period between two events per car.
	  The first event of a time period can be "new purchase", "import", "de-registration",
	  "re-registration" or "EU control". Re-registration will in some cases,
	  but not all, imply that the owner has changed (2nd hand sale).
	  The last event of a time period can be "de-registration", "re-registration",
	  "EU control", "export", or "scrapping". If nothing happens with the car, the last
	  event of the last time period will be "keep"
*/
do "${dofiles}02_creating_car_events.do" // Run time: 1 hour

/* 03: This do-file first expands the events to annual observations, then
    merges together events that are happening during the same year. This changes
	the unit of observation from "car*event period" to "car*year".
	
	Input dataset:
	${newdata}car_owner_event
	- created in do-file #2
	
	Output dataset:
	${newdata}car_stock_end_of_year
	- one observation per car per year, including the owner id of the last
	  person owning the vehicle that year.
*/
do "${dofiles}03_creating_end_of_year_stock.do" // Run time: 1/2 hour

/* 04: The do-file does the following:
	1 Imports a sas-file on the individual-year level that includes main demographics.
	2 Merges the main employer each year to each individual, incl. the neighborhood 
	  of the firm.
	3 Merges distances and toll exposure to each individual-year combination,
	  based on the residence-workplace combination. 
	4 Keeps at most two adults for each household-year, and re-shapes the data 
	  to wide format (household-year-level instead of individual-year-level).
	5 Merges up to three cars to each household based on individual-level annual 
	  records of car ownership at the end of the year. In case the household
	  owns more than three cars, the three newest (i.e. youngest) cars are kept.
	6 Merges on public transport information for the neighborhood pairs between
	  the (up to) two home-workplace combinations per household.

	Input datasets:
	${newdata}househ_wp2
	- individual attributes on the individual-family-year-level
	
	${newdata}utvalg_atmlto
	- all annual employer-employee registrations (potentially several per ind.)
	
	${additionaldata}distances
	- the distance and toll exposure between neighborhood pairs. While distances
	  are static, toll exposure vary per year according to changes in toll rates. 
	  
	${newdata}househ_select
	- for each individual each year, a link to the household ID that individual
	  belongs to 
	  
	${newdata}car_stock_end_of_year
	- created in do-file #03
	
	${additionaldata}kollektivtransport_grk
	- Variables for public transport attributes between neighborhoods, e.g.
	  time on board, time walking to/from stations, etc. The dataset only 
	  includes one-way trips (i.e. the neighborhood*neighborhood matrix 
	  is triangular). Have to assume to-from and from-to attributes are the
	  same.
	
	Output dataset:
	${newdata}hh_3_cars_endofyear
	- unit of observation is household-year. Each household can have up to two
	  adult household members (and attribute values for these) and up to three 
	  cars (and attribute values for these)
*/
do "${dofiles}04_creating_data_hh_endofyear.do" // Run time: 3 hours

* ==============================================================================
* Step two: Create project specific dataset based on the files created above
* ==============================================================================

/* 05: The do-file does the following:
	- Loads in a dataset of distances between neighborhoods to determine which
	  neighborhoods are located in proximity to the four cities celected for the
	  analysis.
	- Loads in the general dataset created in do-file #4, and keeps households 
	  that lived within 50 kilometers of those cities in 2014
	- Keeps observations according to other data selection criteria
	  * household has at least one working individual and at least one observed 
	    work distance 
	  * the work distance is between 5 and 50 kilometers 
	- Creates household level control variables to be used in analysis
	- Creates outcome variables contigent on households' car ownership status 
	- Drops several variables not needed from the dataset created in dofile #4
	- Defines treatment status based on location and toll payments in 2014:
	  * Paying commuter, Bergen: within 50 kms of Bergen and passes the toll cordon 
	    i.e., one of the household members had a toll rate of 25 nok in 2014
	  * Non-paying commuter, Bergen: within 50 kms of Bergen and toll rate is 0
	  * Paying commuter, other cities: within 50 kms of Stavanger, Haugesund or 
	    Kristiansand, and passing the toll cordon; i.e., one of the household 
		members paid 20/14/21 kroners (for each city, respectively) in 2014.
	  * Non-paying commuter, other cities: within 50 kms of Stavanger, Haugesund
	    or Kristiansand, and the toll rate is 0.	
		
	Input datasets:
	${newdata}hh_3_cars_endofyear
	- created in do-file #04
	
	${additionaldata}distances
	- used for determining the distance between neighborhoods in Norway and
	  the four cities celected for the analysis
	  
	Output dataset:
	${newdata}carownership_dataset_bergen.dta
	- The main dataset used in analyses below. 
	- Unit of observation is household-year. Each household can have up to two
	  adult household members (and attribute values for these) and up to three 
	  cars (and attribute values for these).
	- A subset of the Norwegian population according to selection criteria in 
	  the paper.
	- Includes the variable "group", which defines treatment status.  
	
*/
do "${dofiles}05_creating_project_specific_data.do" // Run time: 15 minutes

* ==============================================================================
* Step three: Conduct analyses. Create tables and figures.
* ==============================================================================

* === Setting global parameters for analysis ===================================

/* First year of pre-period */
global firstyearpre 	2011
/* Last year of pre-period (base year) */
global lastyearpre 		2014
/* First year of post-period */
global firstyearpost 	2016
/* Last year of post-period */
global lastyearpost 	2017
/* Name of the variable that defines membership to treatment and control groups 
(four categories) */ 
global trmgroup 		group
/* List of control variables used in the base regressions */
global xvar 			i.couple##c.age##c.age i.couple##c.kvinne i.couple##i.children antpers_i_regstat_famnr secondhome c.dist##c.dist##i.couple c.time_work##c.time_work##i.couple c.employed##i.couple c.retired##i.couple c.wies##c.wies##i.couple c.wealth##c.wealth##i.couple c.PublicVSCarTime_fam_mean##c.PublicVSCarTime_fam_mean##i.couple c.PublicDiffCarTime_fam_mean##c.PublicDiffCarTime_fam_mean##i.couple i.max_educ##i.couple

/* "global yvar" is specified within or in relation to each of the do-files below */

* === Running base regressions =================================================

/* runs five regressions with varying fixed effects for Table 3 and Table D2 */
foreach regvar in bev nrbev ice cars {
	global yvar `regvar'
	do "${dofiles}base_regressions.do" /* Run time: 15 minutes */
}

/* === Creating figures for the main text =================================== */

/* figure of EV ownership over time among the four treatment groups */
do "${dofiles}fig2.do" /* Run time: 2 minutes */

/* Maps of paying/non-paying commuter shares in Bergen municipality */
do "${dofiles}fig3.do" /* Run time: 1 minute */

/* figures with dynamic differences in differences. 
   This do-file also makes figure C.2 */
foreach regvar in bev ice cars { 
	global yvar `regvar'
	do "${dofiles}fig4.do" /* Run time: 5 minutes */
}

* Plotting figures of heterogeneous treatment effects for pr(BEV) and storing estimates
do "${dofiles}fig5.do" /* Run time: 25 minutes */

/* === Creating tables for the main text ==================================== */

/* produces summary statistics for 2014 per treatment group */
do "${dofiles}tab2.do" /* Run time: 20 seconds */

/* reads ster files made in base_regressions.do and saves to a tex file */
do "${dofiles}tab3.do" /* Run time: 10 seconds */

/* effects on moving and job change */
do "${dofiles}tab4.do" /* Run time: 10 seconds */

/* === ONLINE APPENDIX ====================================================== */

/* Creates a replication of the project specific dataset, but KEEPS observations 
   where work distance is below 5 km as part of the sample. This dataset is 
   used for robustness checks only. */
   
do "${dofiles}05b_project_specific_data_incl_short_wd" /* Run time: 15 minutes */

/* === Creating figures and tables for the online appendix ================== */

/* Double difference figures */
foreach regvar in bev nrbev ice cars {
	global yvar `regvar'
	do "${dofiles}figC1.do" /* Run time: 5 minutes */
}

/* Run regressions with bev as outcome with shortest work distance threshhold 
   at intervals 2(1)5, save to tex */
do "${dofiles}tabD1.do" /* Run time: 15 minutes */

/* Run regressions for obs where work distance is below 5, save to tex */
do "${dofiles}tabD2.do" /* Run time: 5 minutes */

/* Run regressions by residence/workplace location */
do "${dofiles}tabD3.do" /* Run time: 15 minutes */

/* reads ster files made in the "base_regressions" do-file and saves to tex-file */
do "${dofiles}tabD4.do" /* Run time: 10 seconds */

/* Run regressions when including neighborhood-year-work distance slopes */
do "${dofiles}tabD5.do" /* Run time: 15 minutes */

/* this file uses estimates stored from do-file "fig4.do". Table of heterogeneous treatment effects*/
do "${dofiles}tabE1.do" /* Run time: 20 seconds */

/* figures with heterogeneous effects for pr(BEV) where demographics are interacted 
   with belonging to the top/bottom income quintile */
do "${dofiles}figE1.do" /* Run time: 15 minutes */

/* figures with heterogeneous effects for ice vehicles */
do "${dofiles}figE2.do" /* Run time: 25 minutes */

/* figures with heterogeneous effects for total car ownership */
do "${dofiles}figE3.do" /* Run time: 25 minutes */




